Code
import eia_api as api
import eia_data
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as pxThe goal of the backfill process is to pull the historical data for the required series using the settings.json file. This includes the following steps:
import eia_api as api
import eia_data
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as pxraw_json = open("../settings/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]facets_template = {
"parent" : None,
"subba" : None
}
start = datetime.datetime(meta_json["start"]["year"],
meta_json["start"]["month"],
meta_json["start"]["day"],
meta_json["start"]["hour"])
end = datetime.datetime(meta_json["end"]["year"],
meta_json["end"]["month"],
meta_json["end"]["day"],
meta_json["end"]["hour"])
offset = 2250
eia_api_key = os.getenv('EIA_API_KEY')
meta_path = meta_json["meta_path"]
data_path = meta_json["data_path"]metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)
print(metadata.meta.keys())
print(metadata.meta["startPeriod"])
print(metadata.meta["endPeriod"])dict_keys(['id', 'name', 'description', 'frequency', 'facets', 'data', 'startPeriod', 'endPeriod', 'defaultDateFormat', 'defaultFrequency'])
2018-06-19T05
2024-06-16T07
for i in series.index:
facets = facets_template
facets["parent"] = series.at[i, "parent_id"]
facets["subba"] = series.at[i, "subba_id"]
print(facets)
temp = api.eia_backfill(api_key = eia_api_key,
api_path = api_path+ "data",
facets = facets,
start = start,
end = end,
offset = offset)
ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
ts_obj = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
ts_obj.drop("period", axis = 1, inplace= True)
ts_obj = ts_obj.rename(columns= {"index": "period"})
meta_temp = eia_data.create_metadata(data = ts_obj, start = start, end = end, type = "backfill")
meta_temp["index"] = 1
meta_df = pd.DataFrame([meta_temp])
if i == series.index.start:
data = ts_obj
meta = meta_df
else:
data = data._append(ts_obj)
meta = meta._append(meta_df){'parent': 'CISO', 'subba': 'PGAE'}
{'parent': 'CISO', 'subba': 'SCE'}
{'parent': 'CISO', 'subba': 'SDGE'}
{'parent': 'CISO', 'subba': 'VEA'}
print(meta)
# The initial pull has some missing values
data.head() index parent subba time start \
0 1 CISO PGAE 2024-06-17 04:25:14.127866+00:00 2018-07-01 08:00:00
0 1 CISO SCE 2024-06-17 04:25:40.000588+00:00 2018-07-01 08:00:00
0 1 CISO SDGE 2024-06-17 04:25:56.750293+00:00 2018-07-01 08:00:00
0 1 CISO VEA 2024-06-17 04:26:15.844556+00:00 2018-07-01 08:00:00
end start_act end_act start_match \
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
end_match n_obs na type update success \
0 True 52218 98 backfill False False
0 True 52218 98 backfill False False
0 True 52218 98 backfill False False
0 True 52218 98 backfill False False
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
| period | subba | subba-name | parent | parent-name | value | value-units | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-07-01 08:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 12522.0 | megawatthours |
| 1 | 2018-07-01 09:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11745.0 | megawatthours |
| 2 | 2018-07-01 10:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11200.0 | megawatthours |
| 3 | 2018-07-01 11:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10822.0 | megawatthours |
| 4 | 2018-07-01 12:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10644.0 | megawatthours |
# Save the data
d = eia_data.append_data(data_path = data_path, new_data = data, init = True, save = True)
# Save the metadata
meta["success"] = True
meta["update"] = True
m = eia_data.append_metadata(meta_path = meta_path, meta = meta, save = True, init = True)
print(m)Initial data pull
Save the data to CSV file
index parent subba time start \
0 1 CISO PGAE 2024-06-17 04:25:14.127866+00:00 2018-07-01 08:00:00
0 1 CISO SCE 2024-06-17 04:25:40.000588+00:00 2018-07-01 08:00:00
0 1 CISO SDGE 2024-06-17 04:25:56.750293+00:00 2018-07-01 08:00:00
0 1 CISO VEA 2024-06-17 04:26:15.844556+00:00 2018-07-01 08:00:00
end start_act end_act start_match \
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
end_match n_obs na type update success \
0 True 52218 98 backfill True True
0 True 52218 98 backfill True True
0 True 52218 98 backfill True True
0 True 52218 98 backfill True True
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
We will use Plotly to visualize the series:
d = data.sort_values(by = ["subba", "period"])
p = px.line(d, x="period", y="value", color="subba")
p.show()